rm(list = ls())

library(data.table)
library(estimatr)

add_backticks = function(x) {
  paste0("`", x, "`")
}

x_lm_formula = function(x) {
  paste(add_backticks(x), collapse = " + ")
}


load('./data/panel_month_dummies.RData')

month_cols <-  grep('month_2', colnames(panel), value=TRUE)

panel[,less_college:=(100-educ_diplBA)]
panel[,(paste('educ',month_cols,sep = '_')):= lapply(.SD, function(x) 
  x * panel[['less_college']] ), .SDcols = month_cols]

panel[,(paste('foreign',month_cols,sep = '_')):= lapply(.SD, function(x) 
  x * panel[['foreign_pop']] ), .SDcols = month_cols]

educ_cols <-  grep('educ_month', colnames(panel), value=TRUE)
educ_cols <- educ_cols[-157]
foreign_cols <- grep('foreign_month', colnames(panel), value=TRUE)
foreign_cols <- foreign_cols[-157]
flex_controls <- c(educ_cols, foreign_cols)

panel[,time:= as.numeric(round((month-as.Date('2020-01-01',format='%Y-%m-%d'))/(365.25/12)))]

province_cols <- grep('code_province_', colnames(panel), value=TRUE)
panel[,(paste('trend',province_cols,sep = '_')):= lapply(.SD, function(x) 
  x * panel[['time']] ), .SDcols = province_cols]
trend_cols <- grep('trend_code_province', colnames(panel), value=TRUE)
trend_cols <- trend_cols[-1]

out_econ <- lm_robust(hc_pc_asians ~ covid_econ, data=panel, se_type='stata',
                      clusters = panelvar, fixed_effects = ~ panelvar + month)
save(out_econ, file = './output/out_econ.RData')
rm(out_econ)

formula_flex_controls <- as.formula(paste("hc_pc_asians ~ covid_econ +", x_lm_formula(flex_controls)))

out_econ_con <- lm_robust(formula = formula_flex_controls,
                          data=panel, se_type='stata',
                          clusters = panelvar, fixed_effects = ~ panelvar + month)

save(out_econ_con, file = './output/out_econ_con.RData')

rm(out_econ_con)

panel[,trend_econ:=time*losing_income_d]

formula_flex_controls_trend <- as.formula(paste("hc_pc_asians ~ covid_econ + trend_econ +", x_lm_formula(flex_controls)))

out_econ_con_trend <- lm_robust(formula = formula_flex_controls_trend,
                                data=panel, se_type='stata',
                                clusters = panelvar, fixed_effects = ~ panelvar + month)

save(out_econ_con_trend, file = './output/out_econ_con_trend.RData')

rm(out_econ_con_trend)

flex_controls <- c(educ_cols, foreign_cols, trend_cols)

formula_flex_controls <- as.formula(paste("hc_pc_asians ~ covid_econ + trend_econ +", x_lm_formula(flex_controls)))

out_econ_con_trend_all <- lm_robust(formula = formula_flex_controls,
                                    data=panel, se_type='stata',
                                    clusters = panelvar, fixed_effects = ~ panelvar + month)

save(out_econ_con_trend_all, file = './output/out_econ_con_trend_all.RData')

## to produce output for row 'Average Hate Crimes' in Panel B

panel_econ <- panel[!is.na(hc_pc_asians) & !is.na(losing_income_d)]
rm(panel)
panel_econ[,treat:=covid2*losing_income_d]

month_cols <-  grep('month_2', colnames(panel_econ), value=TRUE)

panel_econ[,less_college:=(100-educ_diplBA)]
panel_econ[,(paste('educ',month_cols,sep = '_')):= lapply(.SD, function(x) 
  x * panel_econ[['less_college']] ), .SDcols = month_cols]

panel_econ[,(paste('foreign',month_cols,sep = '_')):= lapply(.SD, function(x) 
  x * panel_econ[['foreign_pop']] ), .SDcols = month_cols]

educ_cols <-  grep('educ_month', colnames(panel_econ), value=TRUE)
educ_cols <- educ_cols[-157]
foreign_cols <- grep('foreign_month', colnames(panel_econ), value=TRUE)
foreign_cols <- foreign_cols[-157]
flex_controls <- c(educ_cols, foreign_cols)

mod <- lm_robust(treat ~ losing_income_d + covid2, data=panel_econ)
weight <- ((panel_econ$treat - mod$fitted.values))^2
panel_econ[,effweight:=weight]

effective_sample_econ <- panel_econ[losing_income_d==0,sum(effweight*hc_pc_asians)/sum(effweight),by=covid2]

mod <- lm_robust(treat ~ losing_income_d + covid2, se_type='stata',
                 data=panel_econ, fixed_effects = ~ panelvar + month)
weight <- ((panel_econ$treat - mod$fitted.values))^2
panel_econ[,effweight:=weight]

effective_sample_econ_fe <- panel_econ[losing_income_d==0,sum(effweight*hc_pc_asians)/sum(effweight),by=covid2]
nominal_sample_econ <- panel_econ[losing_income_d==0,mean(hc_pc_asians),by=covid2]

rm(mod)


formula_flex_controls <- as.formula(paste("treat ~ losing_income_d + covid2 +", x_lm_formula(flex_controls)))

mod <- lm_robust(formula = formula_flex_controls,
                 data=panel_econ, se_type='stata',
                 fixed_effects = ~ panelvar + month)
weight <- ((panel_econ$treat - mod$fitted.values))^2
panel_econ[,effweight:=weight]

effective_sample_econ_fe_con <- panel_econ[losing_income_d==0,sum(effweight*hc_pc_asians)/sum(effweight),by=covid2]

rm(mod)

panel_econ[,time:= as.numeric(round((month-as.Date('2020-01-01',format='%Y-%m-%d'))/(365.25/12)))]
panel_econ[,trend_econ:=time*losing_income_d]

formula_flex_controls_trend <- as.formula(paste("treat ~ losing_income_d + covid2 + trend_econ +", x_lm_formula(flex_controls)))

mod <- lm_robust(formula = formula_flex_controls_trend,
                 data=panel_econ, se_type='stata',
                 fixed_effects = ~ panelvar + month)

weight <- ((panel_econ$treat - mod$fitted.values))^2
panel_econ[,effweight:=weight]

effective_sample_econ_fe_con_trend <- panel_econ[losing_income_d==0,sum(effweight*hc_pc_asians)/sum(effweight),by=covid2]

rm(mod)

province_cols <- grep('code_province_', colnames(panel_econ), value=TRUE)
panel_econ[,(paste('trend',province_cols,sep = '_')):= lapply(.SD, function(x) 
  x * panel_econ[['time']] ), .SDcols = province_cols]
trend_cols <- grep('trend_code_province', colnames(panel_econ), value=TRUE)
trend_cols <- trend_cols[-1]

flex_controls <- c(educ_cols, foreign_cols, trend_cols)

formula_flex_controls <- as.formula(paste("treat ~ losing_income_d + covid2 + trend_econ +", x_lm_formula(flex_controls)))

mod <- lm_robust(formula = formula_flex_controls,
                 data=panel_econ, se_type='stata',
                 fixed_effects = ~ panelvar + month)

weight <- ((panel_econ$treat - mod$fitted.values))^2
panel_econ[,effweight:=weight]

effective_sample_econ_fe_con_trend_prov <- panel_econ[losing_income_d==0,sum(effweight*hc_pc_asians)/sum(effweight),by=covid2]

rm(mod)

save(nominal_sample_econ, effective_sample_econ, effective_sample_econ_fe, effective_sample_econ_fe_con,
     effective_sample_econ_fe_con_trend, effective_sample_econ_fe_con_trend_prov, file='./output/effective_sample_data.RData')